miRNA selected

  • miRNA 210-3p
  • miRNA 320a
  • miRNA 320b
  • miRNA 4745-5p
celfiles<-list.celfiles("./cel_files/",full.names=TRUE)
data<-read.celfiles(celfiles)
## Reading in : ./cel_files//A1H_(miRNA-4_0).CEL
## Reading in : ./cel_files//A2H_(miRNA-4_0).CEL
## Reading in : ./cel_files//A3H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C1H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C2H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C3H_(miRNA-4_0).CEL
## Reading in : ./cel_files//F10_(miRNA-4_0).CEL
## Reading in : ./cel_files//F11_(miRNA-4_0).CEL
## Reading in : ./cel_files//K11_(miRNA-4_0).CEL
## Reading in : ./cel_files//K13_(miRNA-4_0).CEL
## Reading in : ./cel_files//L13_(miRNA-4_0).CEL
## Reading in : ./cel_files//L7_(miRNA-4_0).CEL
## Reading in : ./cel_files//P1_(miRNA-4_0).CEL
## Reading in : ./cel_files//P2_(miRNA-4_0).CEL
## Reading in : ./cel_files//P3_(miRNA-4_0).CEL
# Anotació fenotipica
pheno <- read.csv(file="./cel_files/Registremostres.csv",sep=";")

pheno<-pheno %>%
  mutate(NaCl_envellides=ifelse(NaCl_envellides =="si"&PBS_NaCl=="si",1,
                                ifelse(NaCl_envellides =="si"&PBS_NaCl=="no",2,"no")))
datatable_jm<-function(x,column=NULL){
  
  # if(is.na(column)){column<-0}

datatable(
  x,
  extensions = 'Buttons', 
  filter = list(
    position = 'top', clear = T
  ),
  options = list(dom = 'Blfrtip',buttons = list(list(extend = 'colvis')),
                 buttons = list('copy', 'print',
                                list(extend = 'collection',
                                     buttons = c('csv', 'excel', 'pdf'),
                                     text = 'Download')),
                 columnDefs = list(list(visible=FALSE, targets=column))))}
pheno$nom<-gsub("_[(]miRNA-4_0).CEL","",pheno$Nombre.Cel.file)
rownames(pheno)<-pheno$Nombre.Cel.file
# phenoData(data) <- AnnotatedDataFrame(pheno)
pheno_ano<-AnnotatedDataFrame(pheno)

data<-read.celfiles(celfiles, phenoData = pheno_ano)
## Reading in : ./cel_files//A1H_(miRNA-4_0).CEL
## Reading in : ./cel_files//A2H_(miRNA-4_0).CEL
## Reading in : ./cel_files//A3H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C1H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C2H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C3H_(miRNA-4_0).CEL
## Reading in : ./cel_files//F10_(miRNA-4_0).CEL
## Reading in : ./cel_files//F11_(miRNA-4_0).CEL
## Reading in : ./cel_files//K11_(miRNA-4_0).CEL
## Reading in : ./cel_files//K13_(miRNA-4_0).CEL
## Reading in : ./cel_files//L13_(miRNA-4_0).CEL
## Reading in : ./cel_files//L7_(miRNA-4_0).CEL
## Reading in : ./cel_files//P1_(miRNA-4_0).CEL
## Reading in : ./cel_files//P2_(miRNA-4_0).CEL
## Reading in : ./cel_files//P3_(miRNA-4_0).CEL
# RMA
data_rma<-rma(data)
## Background correcting
## Normalizing
## Calculating Expression
library(dplyr)

df <- read.csv("/home/SHARED/annotation_affymetrix/miRNA-4_0-st-v1.annotations.20160922.csv",comment.char = "#")

df<-
df %>% 
  group_by(Accession) %>% 
  mutate(gene=paste(Accession, collapse="|"))

feat<-df[match(rownames(data_rma@featureData@data),df$Probe.Set.Name),]
feat<-AnnotatedDataFrame(feat)
rownames(feat@data)<-
rownames(data_rma@featureData)

data_rma@featureData<-feat

dup.ids <- feat@data$Accession[duplicated(feat@data$Accession)] %>% 
  unique %>%
  sort

data_rma<-
data_rma[,pData(data_rma)$temps!=2|is.na(pData(data_rma)$temps)]
data_rma<-
data_rma[data_rma@featureData@data$Species.Scientific.Name=="Homo sapiens",]
data_rma<-data_rma[data_rma@featureData@data$Sequence.Type=="miRNA",]
exp_rma <- exprs(data_rma)

DEG

# Filtre
comparativa<-"NaCl_envellides"

data_rma<-data_rma[,data_rma@phenoData@data[,comparativa]!="no"]
# mostres: 
# mostres<-c("C2H","C1H","C3H","A1H","A2H","A3H")
# data_rma_f<-data_rma[,data_rma@phenoData@data$nom%in%mostres]

g1<-levels(as.factor(data_rma@phenoData@data[,comparativa]))[1]
g2<-levels(as.factor(data_rma@phenoData@data[,comparativa]))[2]

data_rma@phenoData@data[,comparativa]<-gsub(g1,"NaCl",data_rma@phenoData@data[,comparativa])
data_rma@phenoData@data[,comparativa]<-gsub(g2,"envellides",data_rma@phenoData@data[,comparativa])
g1<-"NaCl"
g2<-"envellides"
data_rma<-
data_rma[data_rma@featureData@data$Species.Scientific.Name=="Homo sapiens",]
data_rma<-data_rma[data_rma@featureData@data$Sequence.Type=="miRNA",]
data_rma_ind<-data_rma
exp_rma <- exprs(data_rma_ind)

phenotype_names <- ifelse(str_detect(pData(data_rma)[,comparativa],g1), g1, g2)

annotation_for_heatmap <- data.frame(Phenotype = phenotype_names)

row.names(annotation_for_heatmap) <- pData(data_rma_ind)$Id_de_la_muestra

dists <- as.matrix(dist(t(exp_rma), method = "manhattan"))
rownames(dists) <- pData(data_rma_ind)$Id_de_la_muestra
hmcol <- rev(colorRampPalette(RColorBrewer::brewer.pal(9, "YlOrRd"))(255))
colnames(dists) <- NULL
diag(dists) <- NA

ann_colors <- list(
  Phenotype = c(g1 = "chartreuse4", g2 = "burlywood3")
  
                   )

names(ann_colors$Phenotype)<-c(g1,g2)
# png(paste0("resultats/","/PLOTS/heatmap.png"),width = 800,height = 800,res=150)
pheatmap(dists, col = (hmcol), 
         
         annotation_col = annotation_for_heatmap,
         annotation_colors = ann_colors,
         legend = TRUE, 
         treeheight_row = 10,
         legend_breaks = c(min(dists, na.rm = TRUE), 
                         max(dists, na.rm = TRUE)), 
         legend_labels = (c("small distance", "large distance")),
         main = "Heatmap calibrated samples")

.tmp<-dev.off()
# knitr::include_graphics("resultats/PLOTS/heatmap.png")

Taula

groups = phenotype_names
f = factor(groups)
design = model.matrix(~ 0 + f)
colnames(design) = c(g1,g2)
data.fit = lmFit(exprs(data_rma_ind),design)

lev <- c(g1, g2)



# Parsing
a<-c(g1,"-",g2)
astr=paste(a, collapse="")
prestr="makeContrasts("
poststr=",levels=design)"
commandstr=paste(prestr,astr,poststr,sep="")





contrast.matrix=eval(parse(text=commandstr))
# colnames(contrast.matrix)<-paste(g1,"-",g2)
data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)

tab = topTable(data.fit.eb,coef=1,lfc = log2(1.5),number=Inf,adjust.method="BH",genelist = data_rma_ind@featureData@data )   
tab_all = topTable(data.fit.eb,coef=1,number=Inf,adjust.method="BH",genelist = data_rma_ind@featureData@data )   

cols<-c("logFC","AveExpr")
cols1<-c("P.Value", "adj.P.Val")



FC_tab<-
tab %>% filter(P.Value<=0.05,abs(logFC)>=log2(1.5)) %>% 
   dplyr::select(-c(GeneChip.Array,Annotation.Date,Sequence,Sequence.Source,Probe.Set.ID,B,t,Probe.Set.Name,Alignments,Clustered.miRNAs.within.10kb,Genome.Context,Target.Genes)) %>% 
  mutate(across(cols, round, 3)) %>% 
  mutate(across(all_of(cols1), format.pval))



datatable_mod4(FC_tab,"FC")

Volcano

tab_all$diffexpressed <- "NO"
# if log2Foldchange > 0.6 and pvalue < 0.05, set as "UP" 
tab_all$diffexpressed[tab_all$logFC > log2(1.5) & tab_all$P.Value < 0.05] <- "UP"
# if log2Foldchange < -0.6 and pvalue < 0.05, set as "DOWN"
tab_all$diffexpressed[tab_all$logFC < -log2(1.5) & tab_all$P.Value < 0.05] <- "DOWN"


tab_all$delabel <- NA
tab_all$delabel[tab_all$diffexpressed != "NO"] <- tab_all$Transcript.ID.Array.Design.[tab_all$diffexpressed != "NO"]

# plot adding up all layers we have seen so far
ggplot(data=tab_all, aes(x=logFC, y=-log10(P.Value), col=diffexpressed,
                      label=delabel))+
        geom_point() + 
        theme_minimal() +
         geom_text_repel() +
        scale_color_manual(values=c("blue", "black", "red")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red")

Heatmap

gens_SIG<-tab %>% filter(P.Value<=0.05,abs(logFC)>=log2(1.5)) %>% 
  .$Probe.Set.Name


data_rma_SIG<-data_rma[data_rma@featureData@data$Probe.Set.Name%in%gens_SIG,]
data_rma_SIG_df<-exprs(data_rma_SIG)
rownames(data_rma_SIG_df)<-data_rma_SIG@featureData@data$Transcript.ID.Array.Design.
hmcol <- rev(colorRampPalette(RColorBrewer::brewer.pal(5, "YlOrRd"))(255))
colnames(dists) <- NULL
diag(dists) <- NA

ann_colors <- list(
  Phenotype = c(g1 = "chartreuse4", g2 = "burlywood3")
  
                   )
names(ann_colors$Phenotype)<-c(g1,g2)

p1<-pheatmap(
  data_rma_SIG_df,cutree_rows = 2,cutree_cols = 2,
         annotation_col = annotation_for_heatmap,
         annotation_colors = ann_colors,scale = "row",
         legend = F, 
         treeheight_row = 50,
         legend_breaks = c(min(dists, na.rm = TRUE), 
                         max(dists, na.rm = TRUE)), 
         legend_labels = (c("small distance", "large distance")),
         main = "miRNA FC>1.5 & p<=0.05")

p1

Find Taregets

library(multiMiR)
mirna_int<-c("hsa-miR-4745-5p")

targets <- get_multimir(mirna = mirna_int, summary = T)
## Searching mirecords ...
## Searching mirtarbase ...
## Searching tarbase ...

Mapping

As a first step, we map the gene names to the STRING database identifiers using the map method.

The map function adds an additional column with STRING identifiers to the dataframe that is passed as first parameter.

library(STRINGdb)

# string_db <- STRINGdb$new(version = "11.5", species = 9606, score_threshold = 200, input_directory="")
# example1_mapped <- string_db$map(targets@summary, "target_symbol", removeUnmappedRows = TRUE )

# save(string_db,file = "string_db")
# save(example1_mapped,file = "example1_mapped")
load("string_db")
load("example1_mapped")

dim(example1_mapped)
## [1] 242  10
hits <- example1_mapped$STRING_id
hits_entrez <- example1_mapped$target_entrez
# head(subset(example1_mapped, log10(pvalue) >= -log10(0.01) | abs(logFC) >= 0.5))

Network proteins

string_db$plot_network(hits,add_summary = T)

Cluster

Returns a list of clusters of interacting proteins.

# clustersList <- string_db$get_clusters(example1_mapped$STRING_id)
# save(clustersList,file="clustersList")
load("clustersList")

for( i in 1:length(clustersList)){
if  (length(clustersList[[i]])>1){
  print(string_db$plot_network(clustersList[[i]]))
}}

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

Perform GO and KEGG pathways enrichment analyses

hits_universe_clean<-gsub("9606.","",string_db$proteins$protein_external_id)
STRING_id_clean<-gsub("9606.","",example1_mapped$STRING_id)



enrichmentGO <- string_db$get_enrichment( hits, category = "Process" )
  
hits_clean<-gsub("9606.","",hits)
go_net <- enrichGO(gene          = hits_entrez,
                    # universe = hits_entrez,
                   OrgDb         = org.Hs.eg.db,
                   ont           = "BP",
               # keyType = "ENSEMBLPROT",
               pAdjustMethod = "fdr",
               pvalueCutoff  = 0.05,
               qvalueCutoff  = 0.2,
        readable      = TRUE)

go_net_df<-go_net@result %>% filter(p.adjust<=0.05)
datatable_jm(enrichmentGO,column = c("preferredNames","inputGenes"))
datatable_jm(go_net_df,column = c("geneID"))
go_net_df$ID%in%enrichmentGO$term
## [1]  TRUE FALSE  TRUE  TRUE
enrichmentKEGG <- string_db$get_enrichment( hits, category = "KEGG" )
datatable_jm(enrichmentKEGG)